library(tidyverse)
library(ggthemes)
library(lubridate)
library(ggpubr)
library(sf)
library(sp)
library(rgdal)
library(patchwork)
library(raster)
library(cowplot)
library(tidylog)
library(ggbeeswarm)

#load datasets
load("data/analysis/MPtweetsv2.Rdata")
MP_panel <- readRDS("data/analysis/MP_panelv2.RDS")
fff_events <- read_csv("data/output/gss_FFF_events.csv")
speeches <- read_csv("data/output/speeches.csv")
climterms <- readRDS("data/output/climgenterms.rds")

#GET COUNT OF CLIMATE TWEETS AND SPEECHES BY YEAR
MPtweets$year <- year(MPtweets$date)

twtyrcnts <- MPtweets %>%
  mutate(obs=1) %>%
  filter(grepl(climterms,
               tweet, ignore.case = T)) %>%
  group_by(year) %>%
  summarise(sum_ctweets = sum(obs))

speeches$year <- year(speeches$speech_date)

spchyrcnts <- speeches %>%
  mutate(obs=1) %>%
  filter(grepl(climterms,
               speech_text, ignore.case = T)) %>%
  group_by(year) %>%
  summarise(sum_cspchs = sum(obs))

#PLOT TWEETS AND SPEECHES

#gen. tweet/speech factor variable
twtspchcnts <- MP_panel %>%
  group_by(date, username) %>%
  summarise(sum_ctwts = sum(sum_ctweets),
            sum_cspchs = sum(sum_cspchs)) %>%
  mutate(ctwt = ifelse(sum_ctwts>0, 1, 0),
         cspch = ifelse(sum_cspchs>0, 1, 0))

#convert to factor
twtspchcnts$ctwtfac <- as.factor(twtspchcnts$ctwt)
twtspchcnts$cspchfac <- as.factor(twtspchcnts$cspch)

#rank by number of any type of climate speech
twtspchrnks <- twtspchcnts %>%
  group_by(username) %>%
  summarise(sum_ctwts = sum(sum_ctwts),
            sum_cspchs = sum(sum_cspchs),
            sum_any = sum_ctwts + sum_cspchs) %>% 
  mutate(rank_twts = rank(sum_ctwts, ties.method = "first"),
         rank_spchs = rank(sum_cspchs, ties.method = "first")) %>%
  dplyr::select(username, rank_twts, rank_spchs)

#plot time-series tile plot for each MP:day for tweets and speeches
g2 <- twtspchcnts %>%
  left_join(twtspchrnks, by = "username") %>%
  mutate(rank = as.factor(rank_twts)) %>%
  ggplot(aes(date,rank, fill = ctwtfac)) +
  geom_tile() +
  labs(x="Date",y="MP:Day, Clim. tweet",title= "") +
  scale_x_date(date_labels="%Y", date_breaks="year") +
  scale_fill_manual(values = c("white", "darkgreen")) +
  theme_tufte(base_size=10, base_family = "Helvetica") +
  theme(legend.position="none",
        axis.text.x=element_text(size=15),
        axis.title.x=element_text(size=15),
        axis.text.y=element_blank(),
        axis.title.y=element_text(size=15),
        axis.ticks=element_line(size=0.1),
        plot.title = element_blank(),
        panel.border = element_rect(colour = "black", fill=NA, size=1))

g4 <- twtspchcnts %>%
  left_join(twtspchrnks, by = "username") %>%
  mutate(rank = as.factor(rank_spchs)) %>%
  ggplot(aes(date,rank, fill = cspchfac)) +
  geom_tile() +
  labs(x="Date",y="MP:Day, Clim. spch",title= "") +
  scale_x_date(date_labels="%Y", date_breaks="year") +
  scale_fill_manual(values = c("white", "darkgreen")) +
  theme_tufte(base_size=10, base_family = "Helvetica") +
  theme(legend.position="none",
        axis.text.x=element_text(size=15),
        axis.title.x=element_text(size=15),
        axis.text.y=element_blank(),
        axis.title.y=element_text(size=15),
        axis.ticks=element_line(size=0.1),
        plot.title = element_blank(),
        panel.border = element_rect(colour = "black", fill=NA, size=1))

#get count of FFF events over observation period
cntfff <- MP_panel %>%
  group_by(date) %>%
  summarise(sum_events = sum(sum_fff_events))

#plot time-series line plot of FFF events
g0 <- cntfff %>% 
  ggplot() +
  geom_bar(aes(date, sum_events),
            col="darkgreen", size=1, stat = "identity") +
  labs(title = "#FFF Events", y = "# Events", x = "Date") +
  theme_tufte(base_size=10, base_family = "Helvetica") +
  theme(axis.text.x=element_text(size=15),
        axis.title.x = element_text(size=15),
        axis.title.y = element_text(size=15),
        axis.text.y = element_text(size=15),
        axis.ticks.x=element_blank(),
        axis.ticks.y=element_blank(),
        plot.title = element_text(size=20, face="bold"))

#get constituency GSS codes from parlitools package map
load("data/output/cons_codes.RData")
fffs <- st_read("data/shapefiles/fff_shp/fff_UK_formatted.shp")
gbc <- st_read("data/shapefiles/GBR_con/GBR_con_formatted.shp")

#simplify GB map
gbc_simp = st_simplify(gbc,
                       dTolerance = 2000, 
                       preserveTopology = T)

#get SpatialPointsDataFrame for FFF events
fffs_ll <- fffs %>%
  mutate(x = unlist(map(geometry, 1)),
         y = unlist(map(geometry, 2)),
         xy = cbind(x,y),
         location = locatin,
         time = time,
         event_type = evnt_ty)
st_geometry(fffs_ll) <- NULL
fffs_ll <- fffs_ll %>%
  dplyr::select(x, y, town, location, time, date, url, event_type)
fffs_ll$date <- substring(fffs_ll$date, 6, 17)
fffs_ll$date <- format(fffs_ll$date, format="%D %b %Y")
fffs_ll$date <- as.Date(parse_date_time(fffs_ll$date, orders = "dmy"))
fffs_ll <- fffs_ll %>%
  filter(date >= "2017-01-01" & date <= "2020-03-05")
write_csv(fffs_ll, "data/output/fff_events_formatted.csv")

fff_coords <- cbind(fffs_ll$x, fffs_ll$y)
fff_pts <- SpatialPointsDataFrame(fff_coords, data = fffs_ll)

#get GB constituency map as SpatialPolygonsDataFrame
gbc_spdf <- as_Spatial(gbc_simp)
proj4_gbc <- proj4string(gbc_spdf)


#set CRS of FFF events to CRS of GB map
crs(fff_pts) <- proj4_gbc
fff_pts_spdf <- spTransform(fff_pts, CRS(proj4_gbc))
# #plot
# plot(gbc_spdf)
# plot(fff_pts_spdf, col="red" , add=TRUE)

# #count points in polygons for FFF events in constituencies
pincon_FFF <- over(fff_pts_spdf, gbc_spdf)
#sum over GSS codes
GSS_sums_FFF <- pincon_FFF %>%
  mutate(obs = 1) %>%
  group_by(pcon17cd) %>%
  summarise(sum_FFF_events = sum(obs)) %>%
  mutate(gss_code = pcon17cd) %>%
  dplyr::select(sum_FFF_events, gss_code) %>%
  na.omit()

GSS_sums_FFFm <- merge(cons_codes, GSS_sums_FFF, by="gss_code", all.x=T)
GSS_sums_FFFm$sum_FFF_events <- ifelse(is.na(GSS_sums_FFFm$sum_FFF_events),
                                       0, GSS_sums_FFFm$sum_FFF_events)

#plot FFF events on hexmap of UK
g5 <- ggplot(GSS_sums_FFFm) +
  geom_sf(aes(fill=as.factor(sum_FFF_events)), lwd=.1) +
  theme_tufte(base_family = "Helvetica") +
  ggtitle("FFF Event map") +
  scale_fill_manual(
    values = c("#ffffff","#e9f7d0", "#d5e8b3", "#c2da99",
               "#b0ca81","#96b462", "#83a545", 
               "#709330", "#587a1b", "#46660c", 
               "#365105", "#294001"),
    name = "# FFF events") +
  theme(plot.title = element_text(size=20, face="bold"),
        legend.text = element_text(size=15),
        legend.title = element_text(size=15))


#get total tweets per day (no missing dates so no date completion required)
totals <- MPtweets %>%
  mutate(obs=1) %>%
  group_by(date) %>%
  summarise(sum_tweets = sum(obs))

#plot climate tweet % over time as line plot
g1 <- MPtweets %>%
  mutate(obs=1) %>%
  filter(grepl(climterms,
               tweet, ignore.case = T)) %>%
  group_by(date) %>%
  summarise(sum_ctweets = sum(obs)) %>%
  full_join(totals, tweets, by="date") %>%
  mutate(sum_ctweets= ifelse(is.na(sum_ctweets), 0, sum_ctweets),
         pctclim = sum_ctweets/sum_tweets) %>%
  arrange(date) %>%
  ggplot(aes(date, pctclim)) +
  # geom_smooth(method= lm, alpha=0.2, fill=c("darkgreen"), col="black") +
  geom_line(col="darkgreen") +
  # geom_point(col="darkgreen", alpha=0.2, size=1) +
  theme_tufte(base_family = "Helvetica") +
  labs( x = "", y = "% climate tweets", title = "Tweets") +
  scale_y_continuous(labels = scales::percent_format(accuracy=.1),
                     expand = c(0, 0), limits = c(0, .05)) +
  theme(plot.title = element_text(size=20, face="bold"),
        axis.text.x = element_blank(),
        axis.title.x = element_blank(),
        axis.title.y = element_text(size=15),
        axis.text.y = element_text(size=15))

#get total speeches by MP
MPtotalspchs <- speeches %>%
  mutate(obs =1) %>%
  group_by(speech_date) %>%
  summarise(sum_spchs = sum(obs)) %>%
  complete(speech_date = seq.Date(as.Date("2017-06-08"), as.Date("2019-12-12"),
                                  by="day")) %>%
  mutate(sum_spchs = ifelse(is.na(sum_spchs), 0, sum_spchs))

#plot climate speech % over time as line plot
g3 <- speeches %>%
  mutate(obs=1) %>%
  filter(grepl(climterms, speech_text, ignore.case = T)) %>%
  group_by(speech_date) %>%
  summarise(sum_cspchs = sum(obs)) %>%
  full_join(MPtotalspchs,  by="speech_date") %>%
  mutate(sum_cspchs= ifelse(is.na(sum_cspchs), 0, sum_cspchs),
         pctclimh = sum_cspchs/sum_spchs,
         pctclimh = ifelse(pctclimh=="NaN", 0, pctclimh)) %>%
  arrange(speech_date) %>%
  ggplot(aes(speech_date, pctclimh)) +
  # geom_smooth(method= lm, alpha=0.2, fill=c("darkgreen"), col="black") +
  geom_line(col="darkgreen") +
  # geom_point(col="darkgreen", alpha=0.2, size=1) +
  theme_tufte(base_family = "Helvetica") +
  labs(x = "", y = "% climate speeches", title = "Speeches") +
  scale_y_continuous(labels = scales::percent_format(accuracy=.1),
                     expand = c(0, 0), limits = c(0, .05)) +
  theme(plot.title = element_text(size=20, face="bold"),
        axis.text.x = element_blank(),
        axis.title.x = element_blank(),
        axis.title.y = element_text(size=15),
        axis.text.y = element_text(size=15))

#get counts of MP tweets by party
MPtotals <- MPtweets %>%
  mutate(obs =1) %>%
  group_by(username, party_value, full_name_value, gender_value) %>%
  summarise(sum_tweets = sum(obs))

#get counts of MP climate tweets by party
MPpctclim <- MPtweets %>%
  mutate(obs=1) %>%
  filter(grepl(climterms, tweet, ignore.case = T)) %>%
  group_by(username) %>%
  summarise(sum_ctweets = sum(obs)) %>%
  full_join(MPtotals,  by="username") %>%
  mutate(sum_ctweets= ifelse(is.na(sum_ctweets), 0, sum_ctweets),
         pctclim = sum_ctweets/sum_tweets)

#get counts of MP speeches by party
speeches$party[speeches$party=="Labour (Co-op)"] <- "Labour"
MPtotalspchs <- speeches %>%
  mutate(obs =1) %>%
  group_by(about, party) %>%
  summarise(sum_spchs = sum(obs))

#get counts of MP climate speeches by party
MPpctclimh <- speeches %>%
  mutate(obs=1) %>%
  filter(grepl(climterms,speech_text, 
               ignore.case = T)) %>%
  group_by(about) %>%
  summarise(sum_cspchs = sum(obs)) %>%
  full_join(MPtotalspchs,  by="about") %>%
  mutate(sum_cspchs= ifelse(is.na(sum_cspchs), 0, sum_cspchs),
         pctclimh = sum_cspchs/sum_spchs)

#bind tweet and speech counts
MPthist <- MPpctclim %>% 
  mutate(pctclim=pctclim*100,
         lpctclim=sqrt(pctclim)) %>%
  filter(party_value=="Labour"|party_value=="Conservative") %>%
  mutate(type = "Tweets", party = party_value) %>%
  dplyr::select(party, lpctclim, type)

MPshist <- MPpctclimh %>% 
  mutate(pctclim=pctclimh*100,
         lpctclim=sqrt(pctclim)) %>%
  filter(party=="Labour"|party=="Conservative") %>%
  mutate(type = "Speeches") %>%
  dplyr::select(party, lpctclim, type)

MPhistdat <- rbind(MPthist, MPshist)

g7 <- ggplot(MPhistdat, aes(x=factor(party), y=lpctclim)) +
  geom_boxplot() +
  geom_quasirandom(aes(color=party, shape=party), alpha=.8, size=2, width=.4) +
  scale_color_manual(values = c("#0087DC", "#DC241f"), name="Party") +
  theme_tufte(base_family = "Helvetica") +
  xlab("") +
  ylab("% Climate speech/tweets (sqrt.)") +
  labs(shape = "Party") +
  facet_wrap(~ type) +
  theme(strip.text = element_text(size=20, face="bold"),
        legend.position = c(0.8, 0.8),
        legend.text = element_text(size=20),
        legend.title = element_text(size=20),
        axis.title.x = element_text(size=15),
        axis.text.x = element_text(size=15),
        axis.title.y = element_text(size=15),
        axis.text.y = element_text(size=15))

#COMBINE ALL PLOTS
gg0 <- plot_grid(g0, labels = c("A"))

gg1 <- ggarrange(g3, g4, ncol = 1, nrow = 2,  align = "v")
gg1 <- plot_grid(gg1, labels = c("B"), ncol = 1)

gg2 <- ggarrange(g1, g2, ncol = 1, nrow = 2,  align = "v")
gg2 <- plot_grid(gg2, labels = c("C"), ncol = 1)


gg3 <- plot_grid(g5, labels = c("D"), ncol = 1)
gg4<- plot_grid(g7, labels = c("E"), ncol = 1)

ggg1 <- plot_grid(gg0,gg1,gg2, ncol=1)
ggg2 <- plot_grid(gg3,gg4, ncol=1)

png(
  "plots/fig1.png",
  width = 550,
  height = 350,
  units = 'mm',
  res = 300
)
plot_grid(ggg1,ggg2, ncol=2)
dev.off()
